import jieba
from gensim import corpora,  models
sentences = ['我喜欢编程',
             '我的目标是写出优质代码',
             '我在大学学习编程',
             '我努力提升编程水平'
             ]
wordsInSentence = []
for one in sentences:
    words = jieba.cut(one.strip()) # jieba分词
    seg = [word for word in list(words) ]
    wordsInSentence.append(seg)
# 生成字典
dict = corpora.Dictionary(wordsInSentence)
print('词典：', dict.token2id)
# 生成tf为向量
tf = []
for word in wordsInSentence:
    tf.append(dict.doc2bow(word))
print('词频：',tf)
tfidf_model = models.TfidfModel(tf)
tfidf = tfidf_model[tf]
print('tf-idf值：',list(tfidf))
